Loading the data

library(openxlsx)

waterOrg <- read.xlsx("treatability_organic.xlsx", sheet = 2, rows = 11:320, cols = 1:10, na.strings = c("NA", "--", "---", ""))

#TODO: factors, grouping

names(waterOrg) <- c("sample", "site", "surface_subsurface", "collection_method", "raw_treated", "alum_dose", "ph", "uv254", "color", "doc")

library(dplyr)

waterOrg <- waterOrg %>% filter(!is.na(ph) & !is.na(uv254) & !is.na(color) & !is.na(doc) & (!is.na(alum_dose) | raw_treated == "Raw"))

waterOrg[waterOrg$raw_treated == "Raw", "alum_dose"] = 0

sampleSplit <- strsplit(sprintf("%0.1f" ,waterOrg$sample), ".", fixed = TRUE)

waterOrg$sampleA <- 0
waterOrg$sampleB <- 0

for (i in 1:length(sampleSplit))
{
    waterOrg$sampleA[i] <- as.integer(sampleSplit[[i]][1])
    waterOrg$sampleB[i] <- as.integer(sampleSplit[[i]][2])
}

waterOrg$sampleA <- as.factor(waterOrg$sampleA)

waterOrg$logPh <- log(waterOrg$ph)
waterOrg$logUv254 <- log(waterOrg$uv254)
waterOrg$logColor <- log(waterOrg$color)
waterOrg$logDoc <- log(waterOrg$doc)

sites <- unique(waterOrg$site)

dataToUse <- waterOrg
#dataToUse <- waterOrg[waterOrg$site == sites[5],]
#dataToUse <- waterOrg[waterOrg$surface_subsurface == "Runoff",]

modelColor <- lm(data = dataToUse, formula = logDoc ~ logColor)
modelColorUv <- lm(data = dataToUse, formula = logDoc ~ logColor + logUv254)
modelColorUvInteract <- lm(data = dataToUse, formula = logDoc ~ logColor * logUv254)

#modelColorUvSquared <- lm(data = dataToUse, formula = logDoc ~ polym(logColor, logUv254, 2))

predictColor <- predict(modelColor, dataToUse)
predictColorUv <- predict(modelColorUv, dataToUse)
predictColorUvInteract <- predict(modelColorUvInteract, dataToUse)





library(ggplot2)

g <- ggplot(data = waterOrg[waterOrg$sampleA == "328",], mapping = aes(x = alum_dose, y = log(color + 1), group = sampleA, color = sampleA)) + geom_line()

g <- ggplot(data = waterOrg[waterOrg$site == sites[10],], mapping = aes(x = log(alum_dose), y = log(doc), group = sampleA, color = surface_subsurface)) + geom_point() + geom_line() + geom_smooth(method = "lm"); g
## Warning: Removed 9 rows containing non-finite values (stat_smooth).

pg <- ggplot(data = data.frame(predictColor = predictColor, predictColorUv = predictColorUv, predictColorUvInteract = predictColorUvInteract, realDoc = dataToUse$logDoc)) + geom_point(mapping = aes(x = realDoc, y = predictColor)) + geom_point(mapping = aes(x = predictColorUv, y = realDoc), color = "red") + geom_point(mapping = aes(x = predictColorUvInteract, y = realDoc), color = "green")

pg <- ggplot(data = data.frame(predictColor = predictColor, predictColorUv = predictColorUv, predictColorUvInteract = predictColorUvInteract, realDoc = dataToUse$logDoc)) + geom_point(mapping = aes(x = realDoc, y = predictColor - realDoc)) + geom_point(mapping = aes(x = predictColorUv, y = predictColorUv - realDoc), color = "red") + geom_point(mapping = aes(x = predictColorUvInteract, y = predictColorUvInteract - realDoc), color = "green")

library(plotly)
dataToPlot <- data.frame(predictColor = predictColor, predictColorUv = predictColorUv, predictColorUvInteract = predictColorUvInteract, realDoc = dataToUse$logDoc, logColor = dataToUse$logColor, logUv254 = dataToUse$logUv254)

pl <- plot_ly(dataToPlot) %>% add_markers(dataToPlot, x = ~logColor, y = ~logUv254, z = ~predictColorUvInteract, marker = list(color = "rgba(0,0,255,0.5)")) %>% add_markers(dataToPlot, x = ~logColor, y = ~logUv254, z = ~realDoc, marker = list(color = "rgba(255,0,0,0.5)"))
pl